**** this do-file 
*A) prepares the input datasets (departures by hour) for the shortest path calculation (performed on the server)
** note: I use only the sample restrictions based on travel time and shipsize 
*B) converts the output from the server into routes by port pair and adds Panama dummy



clear all
global main "/Users/ingahei/UiO Dropbox/IngaB Heiland/Ais project/Repository"
cd "$main"

global data "$main/data_intermediate"
global output "$data/output514"
global soutput "$data/server_output"

global input "$data/input514"




u "$data/port_to_port_ship_bal_clus_small", clear 

keep D_acid A_acid A_date D_date

* generate hrs since Jan 1st
* attribute to next hour if min>30 to minimize mismatch

g D_day=dofc(D_date)-20453
g A_day=dofc(A_date)-20453

tostring A_date, g(tt) usedisplay force
g A_hr=substr(tt,11,2)
g A_min=substr(tt,14,2)
destring A_hr A_min, replace force
replace A_hr = A_hr+1 if A_min>29

g A_time = (A_day-1)*24+A_hr
drop A_hr A_min tt


tostring D_date, g(tt) usedisplay force
g D_hr=substr(tt,11,2)
g D_min=substr(tt,14,2)
destring D_hr D_min, replace force
replace D_hr = D_hr+1 if D_min>29

g D_time = (D_day-1)*24+D_hr

drop D_hr D_min tt A_da* D_da*

duplicates drop

g tt=A_t - D_t
egen mintt=min(tt), by(D_t A_a D_a)
drop if tt!=mintt

drop tt mintt

*u trips_hrs, clear
duplicates drop
save "$data/trips_hrs", replace


* make conversion dataset for actual departures

u "$data/trips_hrs", clear
keep in 1/8784 // the total number of hours in 2016
g D_Ttime = _n-1
keep D_Tt
save "$data/help_hrs", replace


forvalues i=1(1)514 {
u "$data/trips_hrs", clear
keep if D_a==`i'
compress
save "$input/trips_hrs`i'", replace

u "$data/help_hrs", clear
cross using  "$input/trips_hrs`i'"
drop if D_time<D_T
g tt=A_t - D_T
egen mintt=min(tt), by(D_T A_a D_a)
drop if tt!=mintt
drop D_time tt mintt
compress

sort D_a D_T
duplicates drop
append using "$data/help_hrs"
replace D_a=`i' if D_a==.
replace A_a=`i' if A_a==.
replace A_t=D_Tt if A_t==.
save "$input/trips_Thrs`i'", replace
}


erase "$data/help_hrs.dta"




********* process output from server *******


local files: dir "$soutput" files "*.dta"

foreach file of loc files {

u "$soutput/`file'", clear 
	
rename D_a D15_acid
rename A_a D16_acid	

drop if D0_a == D16_a

sort D16_a A_t D0_t
local l=1
while `l'>0 {
bysort D16_a: drop if D0_time[_n]<D0_time[_n-1] & D0_time[_n-1]!=.
local l = r(N_drop)
}



* find the most frequent route pre and post

g byte post =0
replace post=1 if A_t>184*24

bysort  *_acid post: g N=_N

egen mx=max(N), by(D0_aci D16_aci post)
keep if N==mx
* keep only the most frequent routes

drop mx

g tt=A_t-D0_t

*g mean time over maximum frequency routes
collapse (mean) tt N, by(*acid post)

bysort D0_a D16_ac post: g n=_n

g DEP=D0
g ARR=D16

reshape long D@_acid, i(DEP ARR n post) j(hop)

** drop superfluous hops

sort DEP ARR post n hop
bysort DEP ARR post n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.

bysort DEP ARR post n: g A_acid = D_acid[_n+1]

drop if A_acid==.

merge m:1 D_a A_ac using "$data/XPanama_ids_small"
drop if _merge==2
drop _merge

save "$output/`file'", replace

}




u "$output/hrs1_12", clear
forvalues i=2(1)514 {
capture noisily append using "$output/hrs`i'_12"
erase "$output/hrs`i'_12.dta"
}
erase "$output/hrs1_12.dta"


save "$data/hrs_12_small", replace










*********** find the route with the largest capacity


u "$data/port_to_port_ship_bal_clus_small", clear 

* restrict to pre-period
g A_day=dofc(A_date)-20453
keep if A_day<=184

collapse (sum) dwt_usage, by(D_a A_a)
save "$data/dwt_usage", replace





local files: dir "$soutput" files "*.dta"
local k=0
foreach file of loc files {

*local file hrs1_12.dta

u "$soutput/`file'", clear 

keep if A_t<184*24 // restrict to pre period
	
if _N>0 {

	
rename D_a D15_acid
rename A_a D16_acid	

drop if D0_a == D16_a

sort D16_a A_t D0_t
local l=1
while `l'>0 {
bysort D16_a: drop if D0_time[_n]<D0_time[_n-1] & D0_time[_n-1]!=.
local l = r(N_drop)
}


sort D0_a D16_a D0_t
bysort D0_a D16_ac: g n=_n

g DEP=D0_a
g ARR=D16_a

reshape long D@_acid, i(DEP ARR n) j(hop)

** drop superfluous hops

sort DEP ARR n hop
bysort DEP ARR n: drop if D_acid==D_acid[_n-1] & D_acid[_n-1]!=.

bysort DEP ARR n: g A_acid = D_acid[_n+1]

drop if A_acid==.

merge m:1 D_a A_a using "$data/dwt_usage"
drop if _m==2
drop _m

** gen average capacity along total route
egen av_cap=mean(dwt_usage), by(DEP ARR n)

** find the route with maximum average capacity
egen mx_cap=max(av_cap), by(DEP ARR)
keep if av_cap==mx_cap
drop A_time D0_time

merge m:1 D_a A_ac using "$data/XPanama_ids_small"
drop if _merge==2
drop _merge

merge m:1 A_aci D_ac using "$data/teuNEOP_small"
drop if _merge==2
drop _merge


collapse (sum) XP teu*, by(ARR DEP n)
replace XP=0 if XP==2
replace XP=0 if XP==4
replace XP=1 if XP==3

collapse (mean) XP teu*, by(ARR DEP)
rename ARR A_acid 
rename DEP D_acid

if `k'>0 {
	append using "$data/maxC"
}
save "$data/maxC", replace
}
local k=`k'+1
}








** prepare detailed route output data used for alternative exposure measures and for computation of routes avoiding the Panama Canal


local files: dir "$soutput" files "*.dta"
di `files'


foreach file of loc files {


u "$soutput/`file'", clear 
	
rename D_a D15_acid
rename A_a D16_acid	

drop if D0_a == D16_a

if _N>0 {

sort D16_a A_t D0_t
local l=1
while `l'>0 {
bysort D16_a: drop if D0_time[_n]<D0_time[_n-1] & D0_time[_n-1]!=.
local l = r(N_drop)
}


sort D0_a D16_a D0_t
bysort D0_a D16_ac: g n=_n

g DEP=D0_a
g ARR=D16_a

save "$output/full`file'", replace
}
}



u "$output/fullhrs1_12", clear
forvalues i=2(1)514 {
capture noisily append using "$output/fullhrs`i'_12" 
*erase "$output/fullhrs`i'_12.dta" // need for routesNP
}
save "$data/fullhrs12", replace
erase "$output/fullhrs1_12.dta"


